if ENEIDA_UTIL_INCLUDE = 'DATA'

align 16

; public data
am_v4_0         dd 4 dup 0
am_v4_f32_0p5   dd 4 dup 0.5
am_v4_i32_1     dd 4 dup 1
am_v4_f32_1     dd 4 dup 1.0
am_v4_f32_2     dd 4 dup 2.0
am_v4_f32_m1    dd 4 dup -1.0
am_v4_i32_2     dd 4 dup 2
am_v4_i32_7     dd 4 dup 7
am_v4_f32_0p5pi dd 4 dup 1.57079633
am_v4_f32_pi    dd 4 dup 3.1415926535897
am_v4_f32_2pi   dd 4 dup 6.28318531

; private data
_am_min_norm_pos    dd 4 dup 0x00800000
_am_inv_mant_mask   dd 4 dup not 0x7f800000
_am_0x7f            dd 4 dup 0x7f
_am_sign_mask       dd 4 dup 0x80000000
_am_inv_sign_mask   dd 4 dup not 0x80000000
_am_2_div_pi        dd 4 dup 0.636619772
_am_4_div_pi        dd 4 dup 1.2732395
_am_pi_div_2        dd 4 dup 1.5707963
_am_pi_div_4        dd 4 dup 0.7853982
_am_mask_w          dd 0x00000000, 0x00000000, 0x00000000, 0xffffffff

_am_exp_hi          dd 4 dup 88.3762626647949
_am_exp_lo          dd 4 dup -88.3762626647949
_am_exp_rln2        dd 4 dup 1.4426950408889634073599
_am_exp_p0          dd 4 dup 1.26177193074810590878e-4
_am_exp_p1          dd 4 dup 3.02994407707441961300e-2
_am_exp_q0          dd 4 dup 3.00198505138664455042e-6
_am_exp_q1          dd 4 dup 2.52448340349684104192e-3
_am_exp_q2          dd 4 dup 2.27265548208155028766e-1
_am_exp_q3          dd 4 dup 2.00000000000000000009e0
_am_exp_c1          dd 4 dup 6.93145751953125e-1
_am_exp_c2          dd 4 dup 1.42860682030941723212e-6

_am_log_p0      dd 4 dup -7.89580278884799154124e-1
_am_log_p1      dd 4 dup 1.63866645699558079767e1
_am_log_p2      dd 4 dup -6.41409952958715622951e1
_am_log_q0      dd 4 dup -3.56722798256324312549e1
_am_log_q1      dd 4 dup 3.12093766372244180303e2
_am_log_q2      dd 4 dup -7.69691943550460008604e2
_am_log_c0      dd 4 dup 0.693147180559945

_am_exp2_hi     dd 4 dup 127.4999961853
_am_exp2_lo     dd 4 dup -127.4999961853
_am_exp2_p0     dd 4 dup 2.30933477057345225087e-2
_am_exp2_p1     dd 4 dup 2.02020656693165307700e1
_am_exp2_p2     dd 4 dup 1.51390680115615096133e3
_am_exp2_q0     dd 4 dup 2.33184211722314911771e2
_am_exp2_q1     dd 4 dup 4.36821166879210612817e3

_am_log2_c0     dd 4 dup 1.44269504088896340735992

_am_sincos_p0   dd 4 dup 0.15707963267948963959e1
_am_sincos_p1   dd 4 dup -0.64596409750621907082e0
_am_sincos_p2   dd 4 dup 0.7969262624561800806e-1
_am_sincos_p3   dd 4 dup -0.468175413106023168e-2

_am_tan_p0      dd -1.79565251976484877988e7
_am_tan_p1      dd 1.15351664838587416140e6
_am_tan_p2      dd -1.30936939181383777646e4
_am_tan_q0      dd -5.38695755929454629881e7
_am_tan_q1      dd 2.50083801823357915839e7
_am_tan_q2      dd -1.32089234440210967447e6
_am_tan_q3      dd 1.36812963470692954678e4
_am_tan_poleval dd 3.68935e19

_am_atan_t0     dd 4 dup -0.91646118527267623468e-1
_am_atan_t1     dd 4 dup -0.13956945682312098640e1
_am_atan_t2     dd 4 dup -0.94393926122725531747e2
_am_atan_t3     dd 4 dup 0.12888383034157279340e2
_am_atan_s0     dd 4 dup 0.12797564625607904396e1
_am_atan_s1     dd 4 dup 0.21972168858277355914e1
_am_atan_s2     dd 4 dup 0.68193064729268275701e1
_am_atan_s3     dd 4 dup 0.28205206687035841409e2

else if ENEIDA_UTIL_INCLUDE = 'CODE'

macro am_sin_ss {
    movaps xmm7,xmm0
    movss xmm1,[_am_inv_sign_mask]
    movss xmm2,[_am_sign_mask]
    movss xmm3,[_am_2_div_pi]
    andps xmm0,xmm1
    andps xmm7,xmm2
    mulss xmm0,xmm3
    pxor xmm3,xmm3
    movd xmm5,[am_v4_i32_1]
    movss xmm4,[am_v4_f32_1]
    cvttps2dq xmm2,xmm0
    pand xmm5,xmm2
    movd xmm1,[am_v4_i32_2]
    pcmpeqd	xmm5,xmm3
    cvtdq2ps xmm6,xmm2
    pand xmm2,xmm1
    pslld xmm2,30
    subss xmm0,xmm6
    movss xmm3,[_am_sincos_p3]
    minss xmm0,xmm4
    subss xmm4,xmm0
    andps xmm0,xmm5
    andnps xmm5,xmm4
    orps xmm0,xmm5
    movaps xmm1,xmm0
    movss xmm4,[_am_sincos_p2]
    mulss xmm0,xmm0
    xorps xmm2,xmm7
    movss xmm5,[_am_sincos_p1]
    orps xmm1,xmm2
    movaps xmm7,xmm0
    mulss xmm0,xmm3
    movss xmm6,[_am_sincos_p0]
    addss xmm0,xmm4
    mulss xmm0,xmm7
    addss xmm0,xmm5
    mulss xmm0,xmm7
    addss xmm0,xmm6
    mulss xmm0,xmm1
}

macro am_sin_ps {
    movaps xmm7,xmm0
    andps xmm0,dqword [_am_inv_sign_mask]
    andps xmm7,dqword [_am_sign_mask]
    mulps xmm0,dqword [_am_2_div_pi]
    pxor xmm3,xmm3
    movdqa xmm5,dqword [am_v4_i32_1]
    movaps xmm4,dqword [am_v4_f32_1]
    cvttps2dq xmm2,xmm0
    pand xmm5,xmm2
    pcmpeqd	xmm5,xmm3
    cvtdq2ps xmm6,xmm2
    pand xmm2,dqword [am_v4_i32_2]
    pslld xmm2,30
    subps xmm0,xmm6
    minps xmm0,xmm4
    subps xmm4,xmm0
    andps xmm0,xmm5
    andnps xmm5,xmm4
    orps xmm0,xmm5
    movaps xmm1,xmm0
    mulps xmm0,xmm0
    xorps xmm2,xmm7
    orps xmm1,xmm2
    movaps xmm7,xmm0
    mulps xmm0,dqword [_am_sincos_p3]
    addps xmm0,dqword [_am_sincos_p2]
    mulps xmm0,xmm7
    addps xmm0,dqword [_am_sincos_p1]
    mulps xmm0,xmm7
    addps xmm0,dqword [_am_sincos_p0]
    mulps xmm0,xmm1
}

macro am_cos_ss {
    movss xmm1,[_am_inv_sign_mask]
    movss xmm2,[_am_pi_div_2]
    movss xmm3,[_am_2_div_pi]
    andps xmm0,xmm1
    addss xmm0,xmm2
    mulss xmm0,xmm3
    pxor xmm3,xmm3
    movd xmm5,[am_v4_i32_1]
    movss xmm4,[am_v4_f32_1]
    cvttps2dq xmm2,xmm0
    pand xmm5,xmm2
    movd xmm1,[am_v4_i32_2]
    pcmpeqd	xmm5,xmm3
    cvtdq2ps xmm6,xmm2
    pand xmm2,xmm1
    pslld xmm2,30
    subss xmm0,xmm6
    movss xmm3,[_am_sincos_p3]
    minss xmm0,xmm4
    subss xmm4,xmm0
    andps xmm0,xmm5
    andnps xmm5,xmm4
    orps xmm0,xmm5
    movaps xmm1,xmm0
    movss xmm4,[_am_sincos_p2]
    mulss xmm0,xmm0
    movss xmm5,[_am_sincos_p1]
    orps xmm1,xmm2
    movaps xmm7,xmm0
    mulss xmm0,xmm3
    movss xmm6,[_am_sincos_p0]
    addss xmm0,xmm4
    mulss xmm0,xmm7
    addss xmm0,xmm5
    mulss xmm0,xmm7
    addss xmm0,xmm6
    mulss xmm0,xmm1
}

macro am_cos_ps {
    andps xmm0,dqword [_am_inv_sign_mask]
    addps xmm0,dqword [_am_pi_div_2]
    mulps xmm0,dqword [_am_2_div_pi]
    pxor xmm3,xmm3
    movdqa xmm5,dqword [am_v4_i32_1]
    movaps xmm4,dqword [am_v4_f32_1]
    cvttps2dq xmm2,xmm0
    pand xmm5,xmm2
    pcmpeqd	xmm5,xmm3
    cvtdq2ps xmm6,xmm2
    pand xmm2,dqword [am_v4_i32_2]
    pslld xmm2,30
    subps xmm0,xmm6
    minps xmm0,xmm4
    subps xmm4,xmm0
    andps xmm0,xmm5
    andnps xmm5,xmm4
    orps xmm0,xmm5
    movaps xmm1,xmm0
    mulps xmm0,xmm0
    orps xmm1,xmm2
    movaps xmm7,xmm0
    mulps xmm0,dqword [_am_sincos_p3]
    addps xmm0,dqword [_am_sincos_p2]
    mulps xmm0,xmm7
    addps xmm0,dqword [_am_sincos_p1]
    mulps xmm0,xmm7
    addps xmm0,dqword [_am_sincos_p0]
    mulps xmm0,xmm1
}

macro am_sincos_ss {
    movaps xmm7,xmm0
    movss xmm1,[_am_inv_sign_mask]
    movss xmm2,[_am_sign_mask]
    movss xmm3,[_am_2_div_pi]
    andps xmm0,xmm1
    andps xmm7,xmm2
    mulss xmm0,xmm3
    pxor xmm3,xmm3
    movd xmm5,[am_v4_i32_1]
    movss xmm4,[am_v4_f32_1]
    cvttps2dq xmm2,xmm0
    pand xmm5,xmm2
    movd xmm1,[am_v4_i32_2]
    pcmpeqd	xmm5,xmm3
    movd xmm3,[am_v4_i32_1]
    cvtdq2ps xmm6,xmm2
    paddd xmm3,xmm2
    pand xmm2,xmm1
    pand xmm3,xmm1
    subss xmm0,xmm6
    pslld xmm2,30
    minss xmm0,xmm4
    subss xmm4,xmm0
    pslld xmm3,30
    movaps xmm6,xmm4
    xorps xmm2,xmm7
    movaps xmm7,xmm5
    andps xmm6,xmm7
    andnps xmm7,xmm0
    andps xmm0,xmm5
    andnps xmm5,xmm4
    movss xmm4,[_am_sincos_p3]
    orps xmm6,xmm7
    orps xmm0,xmm5
    movss xmm5,[_am_sincos_p2]
    movaps xmm1,xmm0
    movaps xmm7,xmm6
    mulss xmm0,xmm0
    mulss xmm6,xmm6
    orps xmm1,xmm2
    orps xmm7,xmm3
    movaps xmm2,xmm0
    movaps xmm3,xmm6
    mulss xmm0,xmm4
    mulss xmm6,xmm4
    movss xmm4,[_am_sincos_p1]
    addss xmm0,xmm5
    addss xmm6,xmm5
    movss xmm5,[_am_sincos_p0]
    mulss xmm0,xmm2
    mulss xmm6,xmm3
    addss xmm0,xmm4
    addss xmm6,xmm4
    mulss xmm0,xmm2
    mulss xmm6,xmm3
    addss xmm0,xmm5
    addss xmm6,xmm5
    mulss xmm0,xmm1
    mulss xmm6,xmm7
    movaps xmm1,xmm6
}

macro am_sincos_ps {
    movaps xmm7,xmm0
    andps xmm0,dqword [_am_inv_sign_mask]
    andps xmm7,dqword [_am_sign_mask]
    mulps xmm0,dqword [_am_2_div_pi]
    pxor xmm3,xmm3
    movdqa xmm5,dqword [am_v4_i32_1]
    movaps xmm4,dqword [am_v4_f32_1]
    cvttps2dq xmm2,xmm0
    pand xmm5,xmm2
    pcmpeqd	xmm5,xmm3
    movdqa xmm3,dqword [am_v4_i32_1]
    movdqa xmm1,dqword [am_v4_i32_2]
    cvtdq2ps xmm6,xmm2
    paddd xmm3,xmm2
    pand xmm2,xmm1
    pand xmm3,xmm1
    subps xmm0,xmm6
    pslld xmm2,30
    minps xmm0,xmm4
    subps xmm4,xmm0
    pslld xmm3,30
    movaps xmm6,xmm4
    xorps xmm2,xmm7
    movaps xmm7,xmm5
    andps xmm6,xmm7
    andnps xmm7,xmm0
    andps xmm0,xmm5
    andnps xmm5,xmm4
    movaps xmm4,dqword [_am_sincos_p3]
    orps xmm6,xmm7
    orps xmm0,xmm5
    movaps xmm5,dqword [_am_sincos_p2]
    movaps xmm1,xmm0
    movaps xmm7,xmm6
    mulps xmm0,xmm0
    mulps xmm6,xmm6
    orps xmm1,xmm2
    orps xmm7,xmm3
    movaps xmm2,xmm0
    movaps xmm3,xmm6
    mulps xmm0,xmm4
    mulps xmm6,xmm4
    movaps xmm4,dqword [_am_sincos_p1]
    addps xmm0,xmm5
    addps xmm6,xmm5
    movaps xmm5,dqword [_am_sincos_p0]
    mulps xmm0,xmm2
    mulps xmm6,xmm3
    addps xmm0,xmm4
    addps xmm6,xmm4
    mulps xmm0,xmm2
    mulps xmm6,xmm3
    addps xmm0,xmm5
    addps xmm6,xmm5
    mulps xmm0,xmm1
    mulps xmm6,xmm7
    movaps xmm1,xmm6
}

macro am_tan_ss {
    local cont,pole,finish
    movss xmm1,[_am_inv_sign_mask]
    movd eax,xmm0
    andps xmm0,xmm1
    movaps xmm1,xmm0
    mulss xmm0,[_am_4_div_pi]
    cvttss2si edx,xmm0
    and eax,0x80000000
    mov ecx,0x1
    movd xmm7,eax
    mov eax,0x7
    movss xmm5,[am_v4_f32_1]
    and ecx,edx
    and eax,edx
    add edx,ecx
    add eax,ecx
    cvtsi2ss xmm0,edx
    xorps xmm6,xmm6
    mulss xmm0,[_am_pi_div_4]
    subss xmm1,xmm0
    movss xmm2,[_am_tan_p2]
    minss xmm1,xmm5
    movss xmm3,[_am_tan_q3]
    movaps xmm0,xmm1
    mulss xmm1,xmm1
    mulss xmm2,xmm1
    addss xmm3,xmm1
    addss xmm2,[_am_tan_p1]
    mulss xmm3,xmm1
    mulss xmm2,xmm1
    addss xmm3,[_am_tan_q2]
    addss xmm2,[_am_tan_p0]
    mulss xmm3,xmm1
    mulss xmm2,xmm1
    addss xmm3,[_am_tan_q1]
    xorps xmm0,xmm7
    mulss xmm3,xmm1
    mulss xmm2,xmm0
    addss xmm3,[_am_tan_q0]
    rcpss xmm4,xmm3
    mulss xmm3,xmm4
    mulss xmm3,xmm4
    addss xmm4,xmm4
    test eax,0x2
    subss xmm4,xmm3
    mulss xmm2,xmm4
    jz cont
    addss xmm2,xmm0
    comiss xmm6,xmm1
    rcpss xmm4,xmm2
    movss xmm0,[_am_sign_mask]
    jz pole
    mulss xmm2,xmm4
    mulss xmm2,xmm4
    addss xmm4,xmm4
    subss xmm4,xmm2
    xorps xmm0,xmm4
    jmp finish
      pole:
    movss xmm1,[_am_tan_poleval]
    movaps xmm3,xmm0
    andps xmm0,xmm2
    orps xmm0,xmm1
    xorps xmm0,xmm3
    jmp finish
      cont:
    addss xmm0,xmm2
      finish:
}

macro am_tan_ps {
    local pole,finish
    movaps xmm7,xmm0
    andps xmm0,dqword [_am_inv_sign_mask]
    andps xmm7,dqword [_am_sign_mask]
    movaps xmm1,xmm0
    mulps xmm0,dqword [_am_4_div_pi]
    cvttps2dq xmm0,xmm0
    movdqa xmm4,dqword [am_v4_i32_1]
    movdqa xmm5,dqword [am_v4_i32_7]
    pand xmm4,xmm0
    pand xmm5,xmm0
    movaps xmm3,dqword [am_v4_f32_1]
    paddd xmm0,xmm4
    paddd xmm5,xmm4
    cvtdq2ps xmm0,xmm0
    mulps xmm0,dqword [_am_pi_div_4]
    xorps xmm6,xmm6
    subps xmm1,xmm0
    movaps xmm2,dqword [_am_tan_p2]
    minps xmm1,xmm3
    movaps xmm3,dqword [_am_tan_q3]
    movaps xmm0,xmm1
    mulps xmm1,xmm1
    mulps xmm2,xmm1
    addps xmm3,xmm1
    addps xmm2,dqword [_am_tan_p1]
    mulps xmm3,xmm1
    mulps xmm2,xmm1
    addps xmm3,dqword [_am_tan_q2]
    addps xmm2,dqword [_am_tan_p0]
    mulps xmm3,xmm1
    mulps xmm2,xmm1
    addps xmm3,dqword [_am_tan_q1]
    xorps xmm0,xmm7
    mulps xmm3,xmm1
    pand xmm5,dqword [am_v4_i32_2]
    addps xmm3,dqword [_am_tan_q0]
    mulps xmm2,xmm0
    cmpneqps xmm6,xmm1
    rcpps xmm4,xmm3
    pxor xmm7,xmm7
    mulps xmm3,xmm4
    pcmpeqd	xmm5,xmm7
    mulps xmm3,xmm4
    addps xmm4,xmm4
    orps xmm6,xmm5
    subps xmm4,xmm3
    mulps xmm2,xmm4
    movaps xmm1,dqword [_am_sign_mask]
    movmskps eax,xmm6
    addps xmm2,xmm0
    rcpps xmm4,xmm2
    cmp eax,0xf
    movaps xmm0,xmm2
    mulps xmm2,xmm4
    mulps xmm2,xmm4
    addps xmm4,xmm4
    subps xmm4,xmm2
    jne pole
    xorps xmm4,xmm1
    andps xmm0,xmm5
    andnps xmm5,xmm4
    orps xmm0,xmm5
    jmp finish
      pole:
    movaps xmm7,xmm1
    movaps xmm3,dqword [_am_tan_poleval]
    andps xmm1,xmm0
    orps xmm3,xmm1
    andps xmm4,xmm6
    andnps xmm6,xmm3
    orps xmm4,xmm6
    xorps xmm4,xmm7
    andps xmm0,xmm5
    andnps xmm5,xmm4
    orps xmm0,xmm5
      finish:
}

macro am_atan_ss {
    local small,finish
    movss xmm1,[_am_sign_mask]
    rcpss xmm4,xmm0
    orps xmm1,xmm0
    movss xmm6,xmm4
    comiss xmm1,[am_v4_f32_m1]
    movss xmm3,[_am_atan_t0]
    jnc small
    mulss xmm6,xmm6
    movss xmm5,[_am_atan_s0]
    addss xmm5,xmm6
    movss xmm7,[_am_atan_s1]
    rcpss xmm5,xmm5
    mulss xmm5,xmm3
    movss xmm3,[_am_atan_t1]
    addss xmm7,xmm6
    addss xmm5,xmm7
    movss xmm7,[_am_atan_s2]
    rcpss xmm5,xmm5
    mulss xmm5,xmm3
    movss xmm3,[_am_atan_t2]
    addss xmm7,xmm6
    addss xmm5,xmm7
    movss xmm7,[_am_atan_s3]
    rcpss xmm5,xmm5
    mulss xmm5,xmm3
    movss xmm3,[_am_atan_t3]
    addss xmm7,xmm6
    movss xmm2,[_am_sign_mask]
    mulss xmm4,xmm3
    addss xmm5,xmm7
    movss xmm7,[_am_pi_div_2]
    rcpss xmm5,xmm5
    mulss xmm5,xmm4
    andps xmm0,xmm2
    orps xmm0,xmm7
    subss xmm0,xmm5
    jmp finish
      small:
    movaps xmm2,xmm0
    mulss xmm2,xmm2
    movss xmm1,[_am_atan_s0]
    addss xmm1,xmm2
    movss xmm7,[_am_atan_s1]
    rcpss xmm1,xmm1
    mulss xmm1,xmm3
    movss xmm3,[_am_atan_t1]
    addss xmm7,xmm2
    addss xmm1,xmm7
    movss xmm7,[_am_atan_s2]
    rcpss xmm1,xmm1
    mulss xmm1,xmm3
    movss xmm3,[_am_atan_t2]
    addss xmm7,xmm2
    addss xmm1,xmm7
    movss xmm7,[_am_atan_s3]
    rcpss xmm1,xmm1
    mulss xmm1,xmm3
    movss xmm3,[_am_atan_t3]
    addss xmm7,xmm2
    mulss xmm0,xmm3
    addss xmm1,xmm7
    rcpss xmm1,xmm1
    mulss xmm0,xmm1
      finish:
}

macro am_atan_ps {
    movaps xmm5,dqword [am_v4_f32_1]
    movaps xmm6,dqword [am_v4_f32_m1]
    rcpps xmm4,xmm0
    cmpltps xmm5,xmm0
    cmpnleps xmm6,xmm0
    movaps xmm1,dqword [_am_atan_s0]
    orps xmm5,xmm6
    andps xmm4,xmm5
    movaps xmm2,dqword [_am_atan_t0]
    movaps xmm7,xmm5
    andnps xmm5,xmm0
    movaps xmm3,dqword [_am_atan_s1]
    orps xmm4,xmm5
    movaps xmm0,xmm4
    movaps xmm6,dqword [_am_atan_t1]
    mulps xmm4,xmm4
    addps xmm1,xmm4
    movaps xmm5,dqword [_am_atan_s2]
    rcpps xmm1,xmm1
    mulps xmm1,xmm2
    movaps xmm2,dqword [_am_atan_t2]
    addps xmm3,xmm4
    addps xmm1,xmm3
    movaps xmm3,dqword [_am_atan_s3]
    rcpps xmm1,xmm1
    mulps xmm1,xmm6
    movaps xmm6,dqword [_am_atan_t3]
    addps xmm5,xmm4
    addps xmm1,xmm5
    movaps xmm5,dqword [_am_sign_mask]
    rcpps xmm1,xmm1
    mulps xmm1,xmm2
    addps xmm3,xmm4
    movaps xmm4,dqword [_am_pi_div_2]
    mulps xmm6,xmm0
    addps xmm1,xmm3
    andps xmm0,xmm5
    rcpps xmm1,xmm1
    mulps xmm1,xmm6
    orps xmm0,xmm4
    subps xmm0,xmm1
    andps xmm0,xmm7
    andnps xmm7,xmm1
    orps xmm0,xmm7
}

macro am_atanr2_ss {
    movss xmm2,[_am_sign_mask]
    xorps xmm3,xmm3
    movss xmm5,[am_v4_f32_1]
    andps xmm2,xmm0
    mulss xmm0,xmm1
    orps xmm2,dqword [am_v4_f32_pi]
    cmpless	xmm3,xmm1
    movss xmm6,[am_v4_f32_m1]
    rcpss xmm4,xmm0
    cmpltss	xmm5,xmm0
    cmpnless xmm6,xmm0
    movss xmm1,[_am_atan_s0]
    orps xmm5,xmm6
    movss xmm8,xmm2
    movss xmm9,xmm3
    andps xmm4,xmm5
    movss xmm2,[_am_atan_t0]
    movaps xmm7,xmm5
    andnps xmm5,xmm0
    movss xmm3,[_am_atan_s1]
    orps xmm4,xmm5
    movaps xmm0,xmm4
    movss xmm6,[_am_atan_t1]
    mulss xmm4,xmm4
    addss xmm1,xmm4
    movss xmm5,[_am_atan_s2]
    rcpss xmm1,xmm1
    mulss xmm1,xmm2
    movss xmm2,[_am_atan_t2]
    addss xmm3,xmm4
    addss xmm1,xmm3
    movss xmm3,[_am_atan_s3]
    rcpss xmm1,xmm1
    mulss xmm1,xmm6
    movss xmm6,[_am_atan_t3]
    addss xmm5,xmm4
    addss xmm1,xmm5
    movss xmm5,[_am_sign_mask]
    rcpss xmm1,xmm1
    mulss xmm1,xmm2
    addss xmm3,xmm4
    movss xmm4,[_am_pi_div_2]
    mulss xmm6,xmm0
    addss xmm1,xmm3
    andps xmm0,xmm5
    rcpss xmm1,xmm1
    movss xmm3,xmm9
    mulss xmm1,xmm6
    orps xmm0,xmm4
    subss xmm0,xmm1
    movss xmm2,xmm8
    andps xmm0,xmm7
    andnps xmm7,xmm1
    orps xmm0,xmm7
    movaps xmm1,xmm0
    andps xmm0,xmm3
    addss xmm1,xmm2
    andnps xmm3,xmm1
    orps xmm0,xmm3
}

macro am_atanr2_ps {
    movaps xmm2,dqword [_am_sign_mask]
    xorps xmm3,xmm3
    movaps xmm5,dqword [am_v4_f32_1]
    andps xmm2,xmm0
    mulps xmm0,xmm1
    orps xmm2,dqword [am_v4_f32_pi]
    cmpleps	xmm3,xmm1
    movaps xmm6,dqword [am_v4_f32_m1]
    rcpps xmm4,xmm0
    cmpltps	xmm5,xmm0
    cmpnleps xmm6,xmm0
    movaps xmm1,dqword [_am_atan_s0]
    orps xmm5, xmm6
    movaps xmm8,xmm2
    movaps xmm9,xmm3
    andps xmm4,xmm5
    movaps xmm2,dqword [_am_atan_t0]
    movaps xmm7,xmm5
    andnps xmm5,xmm0
    movaps xmm3,dqword [_am_atan_s1]
    orps xmm4,xmm5
    movaps xmm0,xmm4
    movaps xmm6,dqword [_am_atan_t1]
    mulps xmm4,xmm4
    addps xmm1,xmm4
    movaps xmm5,dqword [_am_atan_s2]
    rcpps xmm1,xmm1
    mulps xmm1,xmm2
    movaps xmm2,dqword [_am_atan_t2]
    addps xmm3,xmm4
    addps xmm1,xmm3
    movaps xmm3,dqword [_am_atan_s3]
    rcpps xmm1,xmm1
    mulps xmm1,xmm6
    movaps xmm6,dqword [_am_atan_t3]
    addps xmm5,xmm4
    addps xmm1,xmm5
    movaps xmm5,dqword [_am_sign_mask]
    rcpps xmm1,xmm1
    mulps xmm1,xmm2
    addps xmm3,xmm4
    movaps xmm4,dqword [_am_pi_div_2]
    mulps xmm6,xmm0
    addps xmm1,xmm3
    andps xmm0,xmm5
    rcpps xmm1,xmm1
    movaps xmm3,xmm9
    mulps xmm1,xmm6
    orps xmm0,xmm4
    subps xmm0,xmm1
    movaps xmm2,xmm8
    andps xmm0,xmm7
    andnps xmm7,xmm1
    orps xmm0,xmm7
    movaps xmm1,xmm0
    andps xmm0,xmm3
    addps xmm1,xmm2
    andnps xmm3,xmm1
    orps xmm0,xmm3
}

macro am_exp_ss {
    minss xmm0,[_am_exp_hi]
    movss xmm1,[_am_exp_rln2]
    maxss xmm0,[_am_exp_lo]
    mulss xmm1,xmm0
    movd xmm3,[am_v4_i32_1]
    xorps xmm2,xmm2
    addss xmm1,[am_v4_f32_0p5]
    cmpnltss xmm2,xmm1
    pand xmm2,xmm3
    cvttps2dq xmm1,xmm1
    movss xmm4,[_am_exp_c2]
    psubd xmm1,xmm2
    movss xmm5,[_am_exp_c1]
    cvtdq2ps xmm3,xmm1
    movd xmm7,[_am_0x7f]
    mulss xmm4,xmm3
    mulss xmm5,xmm3
    movss xmm6,[_am_exp_q0]
    subss xmm0,xmm4
    movss xmm4,[_am_exp_p0]
    subss xmm0,xmm5
    paddd xmm1,xmm7
    movss xmm2,xmm0
    mulss xmm0,xmm0
    movss xmm5,[_am_exp_q1]
    mulss xmm6,xmm0
    movss xmm3,[_am_exp_p1]
    mulss xmm4,xmm0
    addss xmm6,xmm5
    movss xmm5,[_am_exp_q2]
    addss xmm4,xmm3
    movss xmm3,[_am_exp_q3]
    mulss xmm6,xmm0
    mulss xmm4,xmm0
    addss xmm6,xmm5
    mulss xmm4,xmm2
    mulss xmm6,xmm0
    movss xmm0,[am_v4_f32_1]
    addss xmm2,xmm4
    addss xmm6,xmm3
    pslld xmm1,23
    subss xmm6,xmm2
    rcpss xmm6,xmm6
    mulss xmm2,xmm6
    addss xmm2,xmm2
    addss xmm0,xmm2
    mulss xmm0,xmm1
}

macro am_exp_ps {
    minps xmm0,dqword [_am_exp_hi]
    movaps xmm1,dqword [_am_exp_rln2]
    maxps xmm0,dqword [_am_exp_lo]
    mulps xmm1,xmm0
    xorps xmm2,xmm2
    addps xmm1,dqword [am_v4_f32_0p5]
    cmpnltps xmm2,xmm1
    pand xmm2,dqword [am_v4_i32_1]
    cvttps2dq xmm1,xmm1
    movaps xmm4,dqword [_am_exp_c2]
    psubd xmm1,xmm2
    movaps xmm5,dqword [_am_exp_c1]
    cvtdq2ps xmm3,xmm1
    mulps xmm4,xmm3
    mulps xmm5,xmm3
    movaps xmm6,dqword [_am_exp_q0]
    subps xmm0,xmm4
    movaps xmm4,dqword [_am_exp_p0]
    subps xmm0,xmm5
    paddd xmm1,dqword [_am_0x7f]
    movaps xmm2,xmm0
    mulps xmm0,xmm0
    movaps xmm5,dqword [_am_exp_q1]
    mulps xmm6,xmm0
    movaps xmm3,dqword [_am_exp_p1]
    mulps xmm4,xmm0
    addps xmm6,xmm5
    movaps xmm5,dqword [_am_exp_q2]
    addps xmm4,xmm3
    movaps xmm3,dqword [_am_exp_q3]
    mulps xmm6,xmm0
    mulps xmm4,xmm0
    addps xmm6,xmm5
    mulps xmm4,xmm2
    mulps xmm6,xmm0
    movaps xmm0,dqword [am_v4_f32_1]
    addps xmm2,xmm4
    addps xmm6,xmm3
    pslld xmm1,23
    subps xmm6,xmm2
    rcpps xmm6,xmm6
    mulps xmm2,xmm6
    addps xmm2,xmm2
    addps xmm0,xmm2
    mulps xmm0,xmm1
}

macro am_exp2_ss {
    minss xmm0,[_am_exp2_hi]
    movss xmm5,[am_v4_f32_0p5]
    maxss xmm0,[_am_exp2_lo]
    xorps xmm7,xmm7
    addss xmm5,xmm0
    xor	ecx,ecx
    mov edx,1
    comiss xmm5,xmm7
    cvttss2si eax,xmm5
    cmovc ecx,edx
    sub eax,ecx
    cvtsi2ss xmm5,eax
    add eax,0x7f
    subss xmm0,xmm5
    movss xmm2,xmm0
    mulss xmm0,xmm0
    movss xmm6,[_am_exp2_q0]
    movss xmm4,[_am_exp2_p0]
    mulss xmm6,xmm0
    movss xmm7,[_am_exp2_q1]
    mulss xmm4,xmm0
    movss xmm5,[_am_exp2_p1]
    shl eax,23
    addss xmm6,xmm7
    addss xmm4,xmm5
    movss xmm5,[_am_exp2_p2]
    mulss xmm4,xmm0
    movd xmm0,eax
    addss xmm4,xmm5
    mulss xmm4,xmm2
    subss xmm6,xmm4
    movss xmm7,[am_v4_f32_1]
    rcpss xmm6,xmm6
    mulss xmm4,xmm6
    addss xmm4,xmm4
    addss xmm4,xmm7
    mulss xmm0,xmm4
}

macro am_exp2_ps {
    minps xmm0,dqword [_am_exp2_hi]
    movaps xmm1,dqword [am_v4_f32_0p5]
    maxps xmm0,dqword [_am_exp2_lo]
    xorps xmm2,xmm2
    addps xmm1,xmm0
    cmpnltps xmm2,xmm1
    pand xmm2,dqword [am_v4_i32_1]
    cvttps2dq xmm1,xmm1
    psubd xmm1,xmm2
    movaps xmm4,dqword [_am_exp2_p0]
    movaps xmm6,dqword [_am_exp2_q0]
    cvtdq2ps xmm2,xmm1
    subps xmm0,xmm2
    movaps xmm2,xmm0
    mulps xmm0,xmm0
    paddd xmm1,dqword [_am_0x7f]
    mulps xmm4,xmm0
    mulps xmm6,xmm0
    addps xmm4,dqword [_am_exp2_p1]
    addps xmm6,dqword [_am_exp2_q1]
    mulps xmm4,xmm0
    addps xmm4,dqword [_am_exp2_p2]
    mulps xmm2,xmm4
    subps xmm6,xmm2
    pslld xmm1,23
    rcpps xmm6,xmm6
    movaps xmm0,dqword [am_v4_f32_1]
    mulps xmm2,xmm6
    addps xmm2,xmm2
    addps xmm0,xmm2
    mulps xmm0,xmm1
}

macro am_log_ss {
    maxss xmm0,[_am_min_norm_pos]
    movss xmm1,[am_v4_f32_1]
    movd edx,xmm0
    andps xmm0,dqword [_am_inv_mant_mask]
    orps xmm0,xmm1
    movaps xmm4,xmm0
    subss xmm0,xmm1
    addss xmm4,xmm1
    shr edx,23
    rcpss xmm4,xmm4
    mulss xmm0,xmm4
    addss xmm0,xmm0
    movaps xmm2,xmm0
    mulss xmm0,xmm0
    sub edx,0x7f
    movss xmm4,[_am_log_p0]
    movss xmm6,[_am_log_q0]
    mulss xmm4,xmm0
    movss xmm5,[_am_log_p1]
    mulss xmm6,xmm0
    movss xmm7,[_am_log_q1]
    addss xmm4,xmm5
    addss xmm6,xmm7
    movss xmm5,[_am_log_p2]
    mulss xmm4,xmm0
    movss xmm7,[_am_log_q2]
    mulss xmm6,xmm0
    addss xmm4,xmm5
    movss xmm5,[_am_log_c0]
    addss xmm6,xmm7
    cvtsi2ss xmm1,edx
    mulss xmm0,xmm4
    rcpss xmm6,xmm6
    mulss xmm0,xmm6
    mulss xmm0,xmm2
    mulss xmm1,xmm5
    addss xmm0,xmm2
    addss xmm0,xmm1
}

macro am_log_ps {
    maxps xmm0,dqword [_am_min_norm_pos]
    movaps xmm1,dqword [am_v4_f32_1]
    movaps xmm3,xmm0
    andps xmm0,dqword [_am_inv_mant_mask]
    orps xmm0,xmm1
    movaps xmm4,xmm0
    subps xmm0,xmm1
    addps xmm4,xmm1
    psrld xmm3,23
    rcpps xmm4,xmm4
    mulps xmm0,xmm4
    psubd xmm3,dqword [_am_0x7f]
    addps xmm0,xmm0
    movaps xmm2,xmm0
    mulps xmm0,xmm0
    movaps xmm4,dqword [_am_log_p0]
    movaps xmm6,dqword [_am_log_q0]
    mulps xmm4,xmm0
    movaps xmm5,dqword [_am_log_p1]
    mulps xmm6,xmm0
    movaps xmm7,dqword [_am_log_q1]
    addps xmm4,xmm5
    addps xmm6,xmm7
    movaps xmm5,dqword [_am_log_p2]
    mulps xmm4,xmm0
    movaps xmm7,dqword [_am_log_q2]
    mulps xmm6,xmm0
    addps xmm4,xmm5
    movaps xmm5,dqword [_am_log_c0]
    addps xmm6,xmm7
    cvtdq2ps xmm1,xmm3
    mulps xmm0,xmm4
    rcpps xmm6,xmm6
    mulps xmm0,xmm6
    mulps xmm0,xmm2
    mulps xmm1,xmm5
    addps xmm0,xmm2
    addps xmm0,xmm1
}

macro am_log2_ss {
    maxss xmm0,[_am_min_norm_pos]
    movss xmm1,[am_v4_f32_1]
    movaps xmm3,xmm0
    andps xmm0,dqword [_am_inv_mant_mask]
    orps xmm0,xmm1
    movaps xmm4,xmm0
    movss xmm7,[_am_0x7f]
    subss xmm0,xmm1
    addss xmm4,xmm1
    psrld xmm3,23
    rcpss xmm4,xmm4
    mulss xmm0,xmm4
    psubd xmm3,xmm7
    addss xmm0,xmm0
    movaps xmm2,xmm0
    mulss xmm0,xmm0
    movss xmm4,[_am_log_p0]
    movss xmm6,[_am_log_q0]
    mulss xmm4,xmm0
    movss xmm5,[_am_log_p1]
    mulss xmm6,xmm0
    movss xmm7,[_am_log_q1]
    addss xmm4,xmm5
    addss xmm6,xmm7
    movss xmm5,[_am_log_p2]
    mulss xmm4,xmm0
    movss xmm7,[_am_log_q2]
    mulss xmm6,xmm0
    addss xmm4,xmm5
    movss xmm5,[_am_log2_c0]
    addss xmm6,xmm7
    cvtdq2ps xmm1,xmm3
    mulss xmm0,xmm4
    rcpss xmm6,xmm6
    mulss xmm0,xmm6
    mulss xmm0,xmm2
    mulss xmm2,xmm5
    mulss xmm0,xmm5
    addss xmm2,xmm1
    addss xmm0,xmm2
}

macro am_log2_ps {
    maxps xmm0,dqword [_am_min_norm_pos]
    movaps xmm1,dqword [am_v4_f32_1]
    movaps xmm3,xmm0
    andps xmm0,dqword [_am_inv_mant_mask]
    orps xmm0,xmm1
    movaps xmm4,xmm0
    subps xmm0,xmm1
    addps xmm4,xmm1
    psrld xmm3,23
    rcpps xmm4,xmm4
    mulps xmm0,xmm4
    psubd xmm3,dqword [_am_0x7f]
    addps xmm0,xmm0
    movaps xmm2,xmm0
    mulps xmm0,xmm0
    movaps xmm4,dqword [_am_log_p0]
    movaps xmm6,dqword [_am_log_q0]
    mulps xmm4,xmm0
    movaps xmm5,dqword [_am_log_p1]
    mulps xmm6,xmm0
    movaps xmm7,dqword [_am_log_q1]
    addps xmm4,xmm5
    addps xmm6,xmm7
    movaps xmm5,dqword [_am_log_p2]
    mulps xmm4,xmm0
    movaps xmm7,dqword [_am_log_q2]
    mulps xmm6,xmm0
    addps xmm4,xmm5
    movaps xmm5,dqword [_am_log2_c0]
    addps xmm6,xmm7
    cvtdq2ps xmm1,xmm3
    mulps xmm0,xmm4
    rcpps xmm6,xmm6
    mulps xmm0,xmm6
    mulps xmm0,xmm2
    mulps xmm2,xmm5
    mulps xmm0,xmm5
    addps xmm2,xmm1
    addps xmm0,xmm2
}

macro am_pow_ss {
    xorps xmm5,xmm5
    movss xmm2,[_am_inv_mant_mask]
    cmpltss	xmm5,xmm0
    maxss xmm0,[_am_min_norm_pos]
    movss xmm7,[am_v4_f32_1]
    movaps xmm3,xmm0
    andps xmm0,xmm2
    orps xmm0,xmm7
    movss xmm8,xmm5
    movaps xmm4,xmm0
    movd xmm2,[_am_0x7f]
    subss xmm0,xmm7
    addss xmm4,xmm7
    psrld xmm3,23
    rcpss xmm4,xmm4
    mulss xmm0,xmm4
    psubd xmm3,xmm2
    addss xmm0,xmm0
    movaps xmm2,xmm0
    mulss xmm0,xmm0
    movss xmm4,[_am_log_p0]
    movss xmm6,[_am_log_q0]
    mulss xmm4,xmm0
    movss xmm5,[_am_log_p1]
    mulss xmm6,xmm0
    movss xmm7,[_am_log_q1]
    addss xmm4,xmm5
    addss xmm6,xmm7
    movss xmm5,[_am_log_p2]
    mulss xmm4,xmm0
    movss xmm7,[_am_log_q2]
    mulss xmm6,xmm0
    addss xmm4,xmm5
    movss xmm5,[_am_log2_c0]
    addss xmm6,xmm7
    cvtdq2ps xmm7,xmm3
    mulss xmm0,xmm4
    rcpss xmm6,xmm6
    mulss xmm0,xmm6
    movss xmm4,[_am_exp2_hi]
    mulss xmm0,xmm2
    movss xmm6,[_am_exp2_lo]
    mulss xmm2,xmm5
    mulss xmm0,xmm5
    addss xmm2,xmm7
    movss xmm3,[am_v4_f32_0p5]
    addss xmm0,xmm2
    xorps xmm2,xmm2
    movd xmm5,[am_v4_i32_1]
    mulss xmm0,xmm1
    minss xmm0,xmm4
    movss xmm4,[_am_exp2_p0]
    maxss xmm0,xmm6
    movss xmm6,[_am_exp2_q0]
    addss xmm3,xmm0
    cmpnltss xmm2,xmm3
    pand xmm2,xmm5
    cvttps2dq xmm3,xmm3
    psubd xmm3,xmm2
    cvtdq2ps xmm2,xmm3
    subss xmm0,xmm2
    movaps xmm2,xmm0
    mulss xmm0,xmm0
    paddd xmm3,dqword [_am_0x7f]
    mulss xmm4,xmm0
    mulss xmm6,xmm0
    addss xmm4,[_am_exp2_p1]
    addss xmm6,[_am_exp2_q1]
    mulss xmm4,xmm0
    addss xmm4,[_am_exp2_p2]
    mulss xmm2,xmm4
    movss xmm0,[am_v4_f32_1]
    subss xmm6,xmm2
    pslld xmm3,23
    rcpss xmm6,xmm6
    movss xmm5,xmm8
    mulss xmm2,xmm6
    andps xmm3,xmm5
    addss xmm2,xmm2
    addss xmm0,xmm2
    mulss xmm0,xmm3
}

macro am_pow_ps {
    xorps xmm5,xmm5
    cmpltps xmm5,xmm0
    maxps xmm0,dqword [_am_min_norm_pos]
    movaps xmm7,dqword [am_v4_f32_1]
    movaps xmm3,xmm0
    andps xmm0,dqword [_am_inv_mant_mask]
    orps xmm0,xmm7
    movaps xmm8,xmm5
    movaps xmm4,xmm0
    subps xmm0,xmm7
    addps xmm4,xmm7
    psrld xmm3,23
    rcpps xmm4,xmm4
    mulps xmm0,xmm4
    psubd xmm3,dqword [_am_0x7f]
    addps xmm0,xmm0
    movaps xmm2,xmm0
    mulps xmm0,xmm0
    movaps xmm4,dqword [_am_log_p0]
    movaps xmm6,dqword [_am_log_q0]
    mulps xmm4,xmm0
    movaps xmm5,dqword [_am_log_p1]
    mulps xmm6,xmm0
    movaps xmm7,dqword [_am_log_q1]
    addps xmm4,xmm5
    addps xmm6,xmm7
    movaps xmm5,dqword [_am_log_p2]
    mulps xmm4,xmm0
    movaps xmm7,dqword [_am_log_q2]
    mulps xmm6,xmm0
    addps xmm4,xmm5
    movaps xmm5,dqword [_am_log2_c0]
    addps xmm6,xmm7
    cvtdq2ps xmm7,xmm3
    mulps xmm0,xmm4
    rcpps xmm6,xmm6
    mulps xmm0,xmm6
    movaps xmm4,dqword [_am_exp2_hi]
    mulps xmm0,xmm2
    movaps xmm6,dqword [_am_exp2_lo]
    mulps xmm2,xmm5
    mulps xmm0,xmm5
    addps xmm2,xmm7
    movaps xmm3,dqword [am_v4_f32_0p5]
    addps xmm0,xmm2
    xorps xmm2,xmm2
    mulps xmm0,xmm1
    minps xmm0,xmm4
    movaps xmm4,dqword [_am_exp2_p0]
    maxps xmm0,xmm6
    movaps xmm6,dqword [_am_exp2_q0]
    addps xmm3,xmm0
    cmpnltps xmm2,xmm3
    pand xmm2,dqword [am_v4_i32_1]
    cvttps2dq xmm3,xmm3
    psubd xmm3,xmm2
    movaps xmm5,dqword [_am_exp2_p1]
    cvtdq2ps xmm2,xmm3
    movaps xmm7,dqword [_am_exp2_q1]
    subps xmm0,xmm2
    movaps xmm2,xmm0
    mulps xmm0,xmm0
    paddd xmm3,dqword [_am_0x7f]
    mulps xmm4,xmm0
    mulps xmm6,xmm0
    addps xmm4,xmm5
    addps xmm6,xmm7
    mulps xmm4,xmm0
    movaps xmm5,xmm8
    pslld xmm3,23
    addps xmm4,dqword [_am_exp2_p2]
    mulps xmm2,xmm4
    movaps xmm0,dqword [am_v4_f32_1]
    subps xmm6,xmm2
    andps xmm3,xmm5
    rcpps xmm6,xmm6
    mulps xmm2,xmm6
    addps xmm2,xmm2
    addps xmm0,xmm2
    mulps xmm0,xmm3
}

end if
